{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nX_train_log = np.log1p(X_train)\\nfeat_names = columns_org + \"_log\"\\nX_train_log = pd.DataFrame(columns = feat_names, data = X_train_log.values)\\n'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "\n",
    "#读取训练数据\n",
    "train_data_path=open(r'C:\\Download\\data\\Otto_train.csv')\n",
    "train_data=pd.read_csv(train_data_path)\n",
    "\n",
    "y_train = train_data['target']\n",
    "#train_id = train_data['id']\n",
    "X_train = train_data.drop([\"id\", \"target\"], axis=1)\n",
    "X_train_part,X_test, y_train_part, y_test =train_test_split(X_train,y_train,test_size=10000, random_state=0)\n",
    "#train,test=train_test_split(train_data,test_size=10000)#抽取10000作为训练集\n",
    "columns_org = X_train.columns\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
       "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
       "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
       "     verbose=0)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.svm import LinearSVC\n",
    "SVC1 = LinearSVC()\n",
    "SVC1.fit(X_train_part,y_train_part)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy is:  0.7498\n",
      "Confusion matrix:\n",
      "[[  89   35    1    0    1   25    5   66   72]\n",
      " [   1 2234  252    8    6   14   20   11    3]\n",
      " [   1  855  360    6    3    4   39   10    1]\n",
      " [   0  308   38   66    3   29   13    2    0]\n",
      " [   0   18    0    0  403    1    0    2    0]\n",
      " [  14   34    3    4    0 2133   37   46   37]\n",
      " [   8   96   20    0    1   38  278   29    4]\n",
      " [  16   35    7    0    2   49   11 1242   16]\n",
      " [  23   35    0    2    1   33    7   41  693]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import confusion_matrix\n",
    "y_predict = SVC1.predict(X_test)\n",
    "\n",
    "print(\"accuracy is: \",accuracy_score(y_test, y_predict))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_grid_point_Linear(C, X_train, y_train, X_test, y_test):\n",
    "    # 在训练集上训练SVC\n",
    "    SVC2 =  LinearSVC( C = C)\n",
    "    SVC2 = SVC2.fit(X_train, y_train)\n",
    "    \n",
    "    # 在校验集上返回accuracy\n",
    "    accuracy = SVC2.score(X_test, y_test)\n",
    "    \n",
    "    print(\"C= {} : accuracy= {} \" .format(C, accuracy))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 0.1 : accuracy= 0.7481 \n",
      "C= 1.0 : accuracy= 0.7473 \n",
      "C= 10.0 : accuracy= 0.7008 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No handles with labels found to put in legend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 100.0 : accuracy= 0.6545 \n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmUFNX5//H3MzAwICKrG8imxDVqdEQTdVgEBDW4K7hhEjWb/tSvS8gR4xYJbohGEBH1uAOCCoqKyKpJUAaXIBAESdRRooiaiCwqPr8/bk9shhm6mZnq6uXzOqePXdW3m6emcT7UvVX3mrsjIiKyNUVxFyAiItlPYSEiIikpLEREJCWFhYiIpKSwEBGRlBQWIiKSksJCRERSUliIiEhKCgsREUmpYdwF1Jc2bdp4p06d4i5DRCSnLFy48FN3b5uqXd6ERadOnSgvL4+7DBGRnGJm76XTTt1QIiKSksJCRERSUliIiEhKeTNmISJS6L755hsqKirYsGHDFq+VlJTQvn17iouLa/XZCgsRkTxRUVHB9ttvT6dOnTCz/+13d9asWUNFRQWdO3eu1WerG0pEJE9s2LCB1q1bbxYUAGZG69atqz3jSJfCQkQkj1QNilT701Xw3VBffQXDh0PDhuFRXPz981Tb29K2pvcWFUEdv0MRkcgVfFisXQvDhsF338VXQyZDKorA25ZthaNIbir4sNhpJ9i0KYTFt99+//jmm9pvR9W2uvdu3Fi79+ZCOB58MNx9NzRtGl+tIrnG3avtcnL3On1uwYdFpaIiaNQoPArBtoRjpoIzeXv9enj4YfjgA5g6FZo1i/snJpL9SkpKWLNmzRaD3JVXQ5WUlNT6sxUWBSoXwvGRR2DwYOjXD6ZNgx12iLsikezWvn17KioqWL169RavVd5nUVsKC8laZ50FjRvDGWdAnz4wfTq0bBl3VSLZq7i4uNb3UaSiS2clq516KkyeDG+9Bb16QTX/YBKRDFBYSNYbMACmTIF//AN69oR//zvuikQKj8JCckLluMU//wndu8OHH8ZdkUhhUVhIzujVK4xbrFoFZWXwXlpLtohIfVBYSE454gh46SX47LMQGCtWxF2RSGFQWEjO6dYNZs0KU7WUlYWxDBGJlsJCctKPfgRz5oSbC7t3h0WL4q5IJL8pLCRn7bcfzJ0bpgfp0QNefz3uikTyl8JCctqee8K8eWE6kF694NVX465IJD9FGhZm1s/MlpnZCjMbUs3rt5vZm4nHO2b2RZXXm5vZh2Z2V5R1Sm7bffcQGG3aQO/e8PLLcVckkn8iCwszawCMAvoD+wCDzGyf5Dbufqm7H+juBwJ/Bp6s8jE3AHOjqlHyR8eOoUuqXbtwT8bMmXFXJJJfojyz6AascPeV7v41MB44fivtBwGPV26Y2cHATsCLEdYoeaRduxAYXbrAscfC88/HXZFI/ogyLNoBHyRtVyT2bcHMOgKdgVmJ7SLgNuCKrf0BZnaBmZWbWXl1syxK4dlpJ5g9G/bZB44/PkwTIiJ1F2VYVLceWk2rbwwEJrn7psT2b4Dn3P2DGtqHD3Mf6+6l7l7atm3bOpQq+aRNm9ANddBBcMopMHFi3BWJ5L4opyivAHZL2m4PfFRD24HAb5O2fwwcaWa/AZoBjcxsrbtvMUguUp2WLeHFF0N31KBBYUXBs8+OuyqR3BVlWCwAuppZZ+BDQiCcUbWRme0JtAT+VrnP3c9Mev1coFRBIduqeXN44YUwa+3gwbBhA5x/ftxVieSmyLqh3P1b4EJgOrAUmOjui83sejMbkNR0EDDe67pArEg1ttsOnn0Wjj4aLrgA7tJF2CK1YvnyO7q0tNTLy8vjLkOy1MaNcPrpYcD7llvg8svjrkgkO5jZQncvTdVOd3BLQWjcGJ54Iqy8d8UV8Mc/xl2RSG7RGtxSMIqL4bHHoKQErr46jGHccANYddftichmFBZSUBo2hAceCGcaN94YAuOWWxQYIqkoLKTgNGgA99wTAuO220Jg3HknFKlTVqRGCgspSEVF8Oc/hy6p224LA+BjxoQgEZEtKSykYJmFLqgmTcKA98aNcP/9oatKRDan/y2koJmFQe7Gjb8f9H700TAYLiLfU1iIAEOHhjOMyy+Hr7+GCRNCgIhIoCE9kYTLLgt3eE+ZAiecAOvXx12RSPZQWIgk+e1v4d57Yfp0OO44+OqruCsSyQ4KC5EqzjsPHnwQ5swJq+79979xVyQSP4WFSDXOPhsefxzmz4c+feDzz+OuSCReCguRGpx2GkyaBG+8AUcdBZ9+GndFIvFRWIhsReXSrEuXQs+e8PHHcVckEg+FhUgK/fuHNTFWroTu3eHDD+OuSCTzFBYiaTjqqLDq3ocfQlkZvPde3BWJZJbCQiRNRx4JM2bAmjUhMN59N+6KRDJHYSGyDQ47DGbNgrVrQ2AsWxZ3RSKZobAQ2UYHHRTuwfj22zCG8fbbcVckEj2FhUgt/PCHMHdumOq8R49wea1IPlNYiNTSXnvBvHmw3XbQqxe89lrcFYlER2EhUgd77BECo1Ur6N0bXnkl7opEoqGwEKmjjh1DYOyyCxx9dBgAF8k3CguRetCuXRjD6NwZjj023JMhkk8UFiL1ZOedw1VSe+0VpgmZOjXuikTqj8JCpB61aRO6oQ44AE4+GZ54Iu6KROqHwkKknrVsCS+9BIceCgMHwiOPxF2RSN0pLEQi0Lx5GLfo3h3OOQfuuy/uikTqRmEhEpFmzWDaNOjbN6y+N3p03BWJ1J7CQiRCTZqE9TB++tOwvveIEXFXJFI7CguRiDVuHFbcO+UUuOwyGDYs7opEtl3DuAsQKQSNGoU1vRs3hquugg0b4LrrwCzuykTSo7AQyZCGDeHBB6GkBG64IQTGTTcpMCQ3RBoWZtYPuANoAIxz9+FVXr8d6JnYbArs6O4tzOxA4G6gObAJuNHdJ0RZq0gmNGgAY8eGM4xbbgmBMXJkmL1WJJtFFhZm1gAYBfQBKoAFZjbV3ZdUtnH3S5PaXwT8KLG5DjjH3Zeb2a7AQjOb7u5fRFWvSKYUFcFdd4UzjBEjQmCMGaPAkOwW5ZlFN2CFu68EMLPxwPHAkhraDwKuAXD3dyp3uvtHZvYJ0BZQWEheMINbbw2BMWwYbNwY7sVoqI5hyVJR/tVsB3yQtF0BHFpdQzPrCHQGtpiv08y6AY0ArXgsecUMbrwxXF579dUhMB5+GIqL465MZEtRhkV1w3ZeQ9uBwCR337TZB5jtAjwMDHb377b4A8wuAC4A6NChQ92qFYnJ0KFhDOPKK0NgjB8ftkWySZS9pBXAbknb7YGPamg7EHg8eYeZNQemAUPdfX51b3L3se5e6u6lbdu2rYeSReJxxRVw553w9NNw0klhHEMkm0QZFguArmbW2cwaEQJhi0mbzWxPoCXwt6R9jYCngIfcXfN2SkG46CK45x54/vlwx/dXX8Vdkcj3IgsLd/8WuBCYDiwFJrr7YjO73swGJDUdBIx39+QuqtOAMuBcM3sz8TgwqlpFssUFF8ADD4Rpzvv3hy+/jLsikcA2/x2du0pLS728vDzuMkTqxYQJcOaZcMgh4UyjRYu4K5J8ZWYL3b00VTtd2S2ShU4/PSyctHAhHHUUrFkTd0VS6BQWIlnqxBPDgPfixdCzJ3zySdwVSSFTWIhksWOOgWefhRUrwkJKH9V0PaFIxBQWIlmud++w6l5FBZSVwfvvx12RFCKFhUgOKCuDGTPg00/D85Ur465ICo3CQiRHHHYYzJwZLqctK4N33kn9HpH6orAQySEHHwyzZ8PXX4fAWLw47oqkUCgsRHLM/vvD3LlhSvMePeDNN+OuSAqBwkIkB+29N8ybF2as7dkTFiyIuyLJdwoLkRy1xx4hMFq2DDfu/eUvcVck+UxhIZLDOnUKgbHzznD00TBnTtwVSb5SWIjkuPbtwxhGx45h8sHp0+OuSPKRwkIkD+yySzir2HNPGDAAnnkm7ook3ygsRPJE27ZhavMDDggLKE2eHHdFkk/SCgszm2xmx5qZwkUki7VqFe707tYtzFz72GNxVyT5It1f/ncDZwDLzWy4me0VYU0iUgc77BDGLY48Es46KyymJFJXaYWFu7/k7mcCBwH/AmaY2V/N7GdmVhxlgSKy7Zo1g2nToE8f+PnPYcyYuCuSXJd2t5KZtQbOBc4D3gDuIITHjEgqE5E6adoUpkwJ63n/+tcwcmTcFUkua5hOIzN7EtgLeBj4qbuvSrw0wcy0lqlIliopgUmT4Iwz4NJLYcMGGDIk7qokF6UVFsBd7j6ruhfSWbtVROLTqBGMHw+DB8Pvfx8C45prwCzuyiSXpBsWe5vZ6+7+BYCZtQQGufvo6EoTkfrSsCE89BA0bgzXXRcC409/UmBI+tIdszi/MigA3P1z4PxoShKRKDRoAOPGhfGLm26CSy4B97irklyR7plFkZmZe/irZWYNgEbRlSUiUSgqglGjwhnGyJGwcSOMHh32i2xNumExHZhoZmMAB34FvBBZVSISGTMYMSIMfg8fHgJj3Lhw5iFSk3TD4nfAL4FfAwa8CIyLqigRiZYZDBsW1sO45powhvHQQ1Csu6akBmmFhbt/R7iL++5oyxGRTDGDP/whdEkNGRLOMMaPD1dPiVSV7n0WXYE/AfsAJZX73b1LRHWJSIb87nehS+qSS8IEhJMmhW2RZOkOaz1AOKv4FugJPES4QU9E8sDFF4cpQaZNC1Ocr1sXd0WSbdINiybuPhMwd3/P3a8FekVXlohk2i9/GSYdnDkzLKL05ZdxVyTZJN0B7g2J6cmXm9mFwIfAjtGVJSJxOPfcMIZx9tlhmdbnnw+z2Iqke2ZxCdAU+H/AwcBZwOCoihKR+AwaBBMnQnk5HHUUfPZZ3BVJNkgZFokb8E5z97XuXuHuP3P3k919fgbqE5EYnHQSPPUUvP029OwJq1fHXZHELWVYuPsm4GAzzSIjUkiOPTas5b18OXTvDqtWpX6P5K90u6HeAKaY2dlmdlLlI9WbzKyfmS0zsxVmtsXEyGZ2u5m9mXi8Y2ZfJL022MyWJx7q8hKJQZ8+Ydzi/fehrAw++CDuiiQu6Q5wtwLWsPkVUA48WdMbEt1Xo4A+QAWwwMymuvuS/32A+6VJ7S8CfpR43gq4BihN/DkLE+/9PM16RaSedO8OL74YrpAqK4NZs6Bz57irkkxL9w7un9Xis7sBK9x9JYCZjQeOB5bU0H4QISAAjgZmuPtniffOAPoBj9eiDhGpo5/8JFxS27dvCIyZM+EHP4i7KsmkdO/gfoDwL/zNuPvPt/K2dkDySWsFcGgNn98R6AxULrBU3XvbpVOriESjtBTmzIHevUNgvPQS7Ldf3FVJpqQ7ZvEsMC3xmAk0B9ameE91A+I1zZ4/EJiUGExP+71mdoGZlZtZ+WpdriESuf33h7lzw5TmPXrAG2/EXZFkSlph4e6Tkx6PAqcBqf5NUQHslrTdHviohrYD2byLKa33uvtYdy9199K2bdumOgwRqQd77w3z5sF220GvXvDqq3FXJJlQ2yVPugIdUrRZAHQ1s85m1ogQCFOrNjKzPYGWwN+Sdk8H+ppZy8QSrn0T+0QkC+yxRwiMVq1Ct9TLL8ddkUQtrbAwsy/N7L+VD+AZwhoXNXL3b4ELCb/klwIT3X2xmV1vZgOSmg4Cxleuwpd472fADYTAWQBcXznYLSLZoWPHEBjt20O/fmEMQ/KXeZ4swltaWurl5eVxlyFScD75JJxdvPMOTJ4cbuaT3GFmC929NFW7dM8sTjSzHZK2W5jZCXUpUETyw447wuzZ4cqoE0+EJ2u8+0pyWbpjFte4+38qN9z9C76/J0JEClzr1uHei0MOgdNOg8cei7siqW/phkV17dK9+1tECsAOO8D06XDkkXDWWXD//XFXJPUp3bAoN7MRZra7mXUxs9uBhVEWJiK5p1mzsNpe377wi1/AqFFxVyT1Jd2wuAj4GpgATATWA7+NqigRyV1Nm8KUKWF51gsvhNtui7siqQ/pzg31FbDFrLEiItVp3BgmTYIzz4TLL4f162Ho0LirkrpI92qoGWbWImm7pZnpJjkRqVFxcRjoPuccuPpquOoqyJMr9QtSuoPUbRJXQAHg7p+bmdbgFpGtatgQHngASkpg2DBYtw5GjAAtpZZ70g2L78ysg7u/D2Bmnah5UkARkf8pKoIxY0JgjBwZuqRGjw77JXekGxZXAa+Y2dzEdhlwQTQliUi+MQtB0bQpDB8OGzbAffdBgwZxVybpSneA+wUzKyUExJvAFMIVUSIiaTELXVFNm8If/hAC4+GHw9iGZL90Fz86D7iYMFX4m8BhhFlie23tfSIiyczCYHdJCVx5ZQiMCRPC1VOS3dLtNbwYOAR4z917EtbK1mpDIlIrV1wBf/5zuB/jhBPCOIZkt3TDYoO7bwAws8bu/g9gz+jKEpF8d+GFcO+9YYqQY4+FtanW3pRYpTvAXZG4z+JpYIaZfU7Nq96JiKTlvPOgSRMYPDisiTFtWphjSrJPugPcJyaeXmtms4EdgBciq0pECsaZZ4YxjIEDw7oY06eHFfgku2zzlc7uPtfdp7r711EUJCKF5+ST4amn4O9/h549w4JKkl10W4yIZIXjjoNnn4Xly6FHD/hIHd1ZRWEhIlmjTx944QX44AMoK4P33ou7IqmksBCRrFJWBjNmwKefhufvvht3RQIKCxHJQocdBrNmwVdfhcD4xz/irkgUFiKSlQ46CObMgU2boHt3WLQo7ooKm8JCRLLWfvvB3Llh/qgePWChFnOOjcJCRLLannvCvHnQvDn06gV//WvcFRUmhYWIZL0uXUJg7Lgj9O0buqcksxQWIpITdtstBEbHjtC/f7jTWzJHYSEiOWOXXcJZxV57wYABMHVq3BUVDoWFiOSUtm3DZbUHHhimCXniibgrKgwKCxHJOS1bhhv3DjssTED48MNxV5T/FBYikpOaNw9Tg/ToEaY4Hzs27orym8JCRHLWdtuFyQf794df/hLuvDPuivKXwkJEclqTJmF68xNPhIsvhptuirui/KSwEJGc16gRTJgAgwbBkCFw7bXgHndV+SXSsDCzfma2zMxWmNmQGtqcZmZLzGyxmT2WtP/mxL6lZnanmVmUtYpIbisuDgPdP/sZXHddCA0FRv1Jdw3ubWZmDYBRQB+gAlhgZlPdfUlSm67A74HD3f1zM9sxsf8nwOHA/ommrwDdgTlR1Ssiua9BAxg3LizTevPNsG4d3HEHFKkPpc4iCwugG7DC3VcCmNl44HhgSVKb84FR7v45gLtXLqboQAnQCDCgGPg4wlpFJE8UFcGoUWEsY8QI2LABxowJQSK1F2VYtAM+SNquAA6t0uYHAGb2F6ABcK27v+DufzOz2cAqQljc5e5LI6xVRPKIGdx6KzRtCn/8YwiMBx6AhlH+xstzUf7oqhtjqNqD2BDoCvQA2gMvm9l+QBtg78Q+gBlmVubu8zb7A8wuAC4A6NChQ/1VLiI5zwxuuCF0SQ0dGgLj0UfDYLhsuyjDogLYLWm7PVB1CfYKYL67fwP808yW8X14zHf3tQBm9jxwGLBZWLj7WGAsQGlpqYayRGQLV10VzjD+7/9g40aYODEEiGybKId9FgBdzayzmTUCBgJVp/16GugJYGZtCN1SK4H3ge5m1tDMigmD2+qGEpFaufRSGD0annkmTEC4bl3cFeWeyMLC3b8FLgSmE37RT3T3xWZ2vZkNSDSbDqwxsyXAbOAKd18DTALeBRYBbwFvufszUdUqIvnv178O4xYzZ4Y7vr/8Mu6Kcot5nlyIXFpa6uXl5XGXISJZbvx4OOssOOQQeP55aNEi7oriZWYL3b00VTtdfSwiBWXgwDCt+cKFYZnWTz+Nu6LcoLAQkYJz4okwZQosXQo9e8LHuosrJYWFiBSk/v1h2jRYuRLKyqCiIu6KspvCQkQKVq9eYS3vVatCYPzrX3FXlL0UFiJS0I44Ilwh9cUXITCWL4+7ouyksBCRgnfIITB7NqxfHwJjyZLU7yk0CgsREeCAA2Du3PC8e3d4881468k2CgsRkYR99oF588KMtT17wmuvxV1R9lBYiIgk6do1BEarVtC7N7zyStwVZQeFhYhIFZ06hcDYdVc4+ugwAF7oFBYiItVo1y6MYXTpAsceC889F3dF8VJYiIjUYKedwlVS++4LJ5wATz0Vd0XxUViIiGxFmzahG+rgg+HUU8NEhIVIYSEikkKLFvDii3D44XDGGWGq80KjsBARScP224cpzXv3hp//HO6+O+6KMkthISKSpqZNYepU+OlP4Te/gdtvj7uizFFYiIhsg5ISmDQJTjklrOt9441xV5QZDeMuQEQk1zRqBI8/HoJj6NAwp9QNN4BZ3JVFR2EhIlILDRvCgw+GwLjxxhAYt96av4GhsBARqaWiIrjnnjCX1IgRITDuuivszzcKCxGROigqgjvuCIFx882wYQPcey80aBB3ZfVLYSEiUkdmMHx4uFrq2mvDGcZDD0FxcdyV1R+FhYhIPTCDa64JYxhDhsDGjWEQvHHjuCurH3nYsyYiEp/f/Q7uvDPMI3XSSeEsIx8oLERE6tlFF8HYseGO7+OOg6++iruiulNYiIhE4Pzzw6W1c+aENTH++9+4K6obhYWISETOPjvMUvvqq2FOqc8+i7ui2lNYiIhE6NRTYfJkeOst6NULVq+Ou6LaUViIiERswAB45hl45x3o0QNWrYq7om2nsBARyYC+fcOA93vvQVkZvP9+3BVtG4WFiEiGdO8OM2bAJ5+EwFi5Mu6K0qewEBHJoB//GGbNgi+/DIGxbFncFaVHYSEikmEHHxwuqf3mmxAYixbFXVFqCgsRkRj88Icwd26Y6rxHD3j99bgr2rpIw8LM+pnZMjNbYWZDamhzmpktMbPFZvZY0v4OZvaimS1NvN4pylpFRDJtr71g3jxo1ixcVjt/ftwV1SyysDCzBsAooD+wDzDIzPap0qYr8HvgcHffF7gk6eWHgFvcfW+gG/BJVLWKiMRl993h5ZehTRvo0yeERzaK8syiG7DC3Ve6+9fAeOD4Km3OB0a5++cA7v4JQCJUGrr7jMT+te6+LsJaRURi06FDCInddoN+/cIVU9kmyrBoB3yQtF2R2JfsB8APzOwvZjbfzPol7f/CzJ40szfM7JbEmcpmzOwCMys3s/LVuXpbpIgIsOuuYdC7a9cw+eAzz8Rd0eaiDIvqVqL1KtsNga5AD2AQMM7MWiT2HwlcDhwCdAHO3eLD3Me6e6m7l7Zt27b+KhcRicGOO8Ls2bD//mF680mT4q7oe1GGRQWwW9J2e+CjatpMcfdv3P2fwDJCeFQAbyS6sL4FngYOirBWEZGs0KoVvPQSdOsGp58OjzwSd0VBlGGxAOhqZp3NrBEwEJhapc3TQE8AM2tD6H5amXhvSzOrPF3oBSyJsFYRkayxww4wfXq44/ucc2DcuLgrijAsEmcEFwLTgaXARHdfbGbXm9mARLPpwBozWwLMBq5w9zXuvonQBTXTzBYRurTujapWEZFs06wZTJsW1sI4/3y466546zH3qsMIuam0tNTLy8vjLkNEpF5t3Bi6o6ZMgZtvhiuuqN/PN7OF7l6aqp3u4BYRyWKNG8MTT4TAuPJKuP56iOPf+A0z/0eKiMi2KC6GRx+FJk3gmmtg/XoYNgysumtOI6KwEBHJAQ0awH33QUkJDB8O69bByJGZCwyFhYhIjigqgtGjQ2CMHAkbNsDdd4f9UVNYiIjkEDMYMQKaNg1dUevXw/33h9lro6SwEBHJMWZw441hDOPqq8MZxuOPh66qqCgsRERy1NChITC++CLaoACFhYhITrvsssz8ObrPQkREUlJYiIhISgoLERFJSWEhIiIpKSxERCQlhYWIiKSksBARkZQUFiIiklLeLH5kZquB9+rwEW2AT+upnDjly3GAjiVb5cux5MtxQN2OpaO7t03VKG/Coq7MrDyd1aKyXb4cB+hYslW+HEu+HAdk5ljUDSUiIikpLEREJCWFxffGxl1APcmX4wAdS7bKl2PJl+OADByLxixERCQlnVmIiEhKBRsWZnaqmS02s+/MrMarCMysn5ktM7MVZjYkkzWmw8xamdkMM1ue+G/LGtptMrM3E4+pma5za1L9jM2ssZlNSLz+qpl1ynyV6UnjWM41s9VJ38V5cdSZipndb2afmNnbNbxuZnZn4jj/bmYHZbrGdKRxHD3M7D9J38cfMl1jusxsNzObbWZLE7+7Lq6mTXTfi7sX5APYG9gTmAOU1tCmAfAu0AVoBLwF7BN37VVqvBkYkng+BLiphnZr4661tj9j4DfAmMTzgcCEuOuuw7GcC9wVd61pHEsZcBDwdg2vHwM8DxhwGPBq3DXX8jh6AM/GXWeax7ILcFDi+fbAO9X8/YrseynYMwt3X+ruy1I06wascPeV7v41MB44PvrqtsnxwIOJ5w8CJ8RYS22k8zNOPsZJwFFmZhmsMV258PclLe4+D/hsK02OBx7yYD7Qwsx2yUx16UvjOHKGu69y99cTz78ElgLtqjSL7Hsp2LBIUzvgg6TtCrb8cuK2k7uvgvCXCdixhnYlZlZuZvPNLJsCJZ2f8f/auPu3wH+A1hmpbtuk+/fl5EQXwSQz2y0zpdW7XPh/I10/NrO3zOx5M9s37mLSkeiK/RHwapWXIvte8noNbjN7Cdi5mpeucvcp6XxENfsyfvnY1o5jGz6mg7t/ZGZdgFlmtsjd362fCusknZ9xVnwPaUinzmeAx919o5n9inDG1CvyyupfrnwnqbxOmO5irZkdAzwNdI25pq0ys2bAZOASd/9v1ZereUu9fC95HRbu3ruOH1EBJP/Lrz3wUR0/c5tt7TjM7GMz28XdVyVONz+p4TM+Svx3pZnNIfyrJBvCIp2fcWWbCjNrCOxAdnYtpDwWd1+TtHkvcFMG6opCVvy/UVfJv2zd/TkzG21mbdw9K+eMMrNiQlA86u5PVtMksu9F3VBbtwDoamadzawRYXA1q64kItQzOPF8MLDFGZOZtTSzxonnbYDDgSUZq3Dr0vkZJx/jKcAsT4zmZZmUx1Kl/3gAod85F00FzkkBkS8xAAAC80lEQVRcfXMY8J/K7tBcYmY7V45/mVk3wu/ENVt/VzwSdd4HLHX3ETU0i+57iXuEP64HcCIhhTcCHwPTE/t3BZ5LancM4aqDdwndV7HXXuU4WgMzgeWJ/7ZK7C8FxiWe/wRYRLg6ZxHwi7jrrnIMW/yMgeuBAYnnJcATwArgNaBL3DXX4Vj+BCxOfBezgb3irrmG43gcWAV8k/j/5BfAr4BfJV43YFTiOBdRwxWFcT/SOI4Lk76P+cBP4q55K8dyBKFL6e/Am4nHMZn6XnQHt4iIpKRuKBERSUlhISIiKSksREQkJYWFiIikpLAQEZGUFBYi28DM1tbx/ZMSd9FjZs3M7B4zezcxi+g8MzvUzBolnuf1TbOSWxQWIhmSmHeogbuvTOwaR7gTvau770uYkbaNh0kIZwKnx1KoSDUUFiK1kLhD9hYze9vMFpnZ6Yn9RYkpIxab2bNm9pyZnZJ425kk7rA3s92BQ4Gh7v4dhKlY3H1aou3TifYiWUGnuSK1cxJwIHAA0AZYYGbzCFOpdAJ+SJgBeClwf+I9hxPuKAbYF3jT3TfV8PlvA4dEUrlILejMQqR2jiDMHrvJ3T8G5hJ+uR8BPOHu37n7vwlTelTaBVidzocnQuRrM9u+nusWqRWFhUjt1LT40tYWZVpPmOcKwnxEB5jZ1v4fbAxsqEVtIvVOYSFSO/OA082sgZm1JSzf+RrwCmFxoyIz24mwbGelpcAeAB7WEikHrkua9bSrmR2feN4aWO3u32TqgES2RmEhUjtPEWb/fAuYBVyZ6HaaTJjd9G3gHsJKZv9JvGcam4fHeYRFrVaY2SLC+haVaw/0BJ6L9hBE0qdZZ0XqmZk187DyWmvC2cbh7v5vM2tCGMM4fCsD25Wf8STwe0+9TrxIRuhqKJH696yZtQAaATckzjhw9/Vmdg1hTeT3a3pzYuGkpxUUkk10ZiEiIilpzEJERFJSWIiISEoKCxERSUlhISIiKSksREQkJYWFiIik9P8B0NEl3goJMZQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "#只调c\n",
    "C_s = np.logspace(-1, 2, 4)# logspace(a,b,N)把10的a次方到10的b次方区间分成N份  \n",
    "#penalty_s = ['l1','l2']\n",
    "\n",
    "accuracy_s = []\n",
    "for i, oneC in enumerate(C_s):\n",
    "#    for j, penalty in enumerate(penalty_s):\n",
    "    tmp = fit_grid_point_Linear(oneC, X_train_part, y_train_part, X_test, y_test)\n",
    "    accuracy_s.append(tmp)\n",
    "\n",
    "x_axis = np.log10(C_s)\n",
    "#for j, penalty in enumerate(penalty_s):\n",
    "plt.plot(x_axis, np.array(accuracy_s), 'b-')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'accuracy' )\n",
    "#plt.savefig('SVM_Otto.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.1\n"
     ]
    }
   ],
   "source": [
    "index = np.argmax(accuracy_s, axis=None)#寻找最佳超参数\n",
    "Best_C = C_s[ index ]\n",
    "print(Best_C)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearSVC(C=0.1, class_weight=None, dual=True, fit_intercept=True,\n",
       "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
       "     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
       "     verbose=0)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "SVC3 = LinearSVC(C = Best_C)\n",
    "SVC3.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVC\n",
    "def fit_grid_point_RBF(C, gamma, X_train, y_train, X_test, y_test):\n",
    "\n",
    "    SVC3 = SVC( C = C, kernel='rbf', gamma = gamma)\n",
    "    SVC3 = SVC3.fit(X_train, y_train)\n",
    "    accuracy = SVC3.score(X_test, y_test)\n",
    "    \n",
    "    print(\"C= {} and gamma = {}: accuracy= {} \" .format(C, gamma, accuracy))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_s = np.matrix(np.zeros(shape=(5, 3)), float)\n",
    "gamma_s = np.logspace(-1, 1, 3)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 0.1 and gamma = 0.1: accuracy= 0.3723 \n"
     ]
    }
   ],
   "source": [
    "oneC = 0.1\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    accuracy_s[0,j] = fit_grid_point_RBF(oneC, gamma, X_train_part, y_train_part, X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oneC = 1\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    accuracy_s[1,j] = fit_grid_point_RBF(oneC, gamma, X_train_part, y_train_part, X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oneC = 10\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    accuracy_s[2,j] = fit_grid_point_RBF(oneC, gamma, X_train_part, y_train_part, X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "oneC = 100\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    accuracy_s[3,j] = fit_grid_point_RBF(oneC, gamma, X_train_part, y_train_part, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "当使用RBF训练SVM时，正则参数C和核函数宽度参数gamma都会影响模型的复杂度。C越小，决策边界越平滑，C越大，决策边界越不平滑，对该模型分类效果越好；gamma越大，对应的RBF标准差下降，影响范围减小，决策边界越不平滑,gamma越小，决策边界越平滑。\n",
    "C是惩罚系数，即对误差的宽容度。c越高，说明越不能容忍出现误差,容易过拟合。C越小，容易欠拟合。C过大或过小，泛化能力变差。\n",
    "gamma是选择RBF函数作为kernel后，该函数自带的一个参数。隐含地决定了数据映射到新的特征空间后的分布，gamma越大，支持向量越少，gamma值越小，支持向量越多。支持向量的个数影响训练与预测的速度。gamma设的太大，训练集上准确率很高，测试集准确率却不理想。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
